package com.english.common;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

import java.util.ArrayList;
import java.util.List;

public class BiliBiliPageProcessor implements PageProcessor {
    List<String> videoList = new ArrayList<>();
    List<String > textList = new ArrayList<>();
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
    public BiliBiliPageProcessor(String videoUrl) {
        Spider.create(this).addUrl(videoUrl).thread(1).run();
    }
    @Override
    public void process(Page page) {
        Html html = page.getHtml();
        videoList = html.xpath("//li[@class='video-item matrix']//div[@class='headline clearfix']/a[@target='_blank' or @class='title']/@href").all();
        textList = html.xpath("//li[@class='video-item matrix']//div[@class='headline clearfix']/a[@target='_blank' or @class='title']/@title").all();
    }
    @Override
    public Site getSite() {
        return site;
    }
    public List<String> getVideoHrefList(){
        return videoList;
    }
    public List<String> getTextHrefList() {
        return textList;
    }



}
